*exchange rate
import excel "${hmrc_datadir}\CBT\Exchange_rate_PennWorld8.1.xlsx",  firstrow clear
drop VariableCode 
rename RegionCode alpha3code
rename YearCode year
rename AggValue exchange_rate
save "${hmrc_datadir}\CBT\Exchange_rate", replace

*tax havens
import delimited "${hmrc_datadir}\CBT\tax havens_iso.csv", varnames(1) clear 
save "${hmrc_datadir}\CBT\tax havens_iso.dta", replace

*Differentiated goods
use "${hmrc_datadir}\Trade\arr.dta" , clear
keep  comcode  sitc
duplicates drop
append using "${hmrc_datadir}\Trade\dis.dta"
keep  comcode  sitc
duplicates drop
append using "${hmrc_datadir}\Trade\imp.dta"
keep  comcode  sitc
duplicates drop
append using "${hmrc_datadir}\Trade\exp.dta"
keep  comcode  sitc
duplicates drop

gen sitc4=sitc
replace sitc4=int(sitc4/10) if sitc4>9999
save "${hmrc_datadir}\Trade\sitc_comcode.dta", replace

insheet using "${hmrc_datadir}\CBT\Rauch_classification_revised.csv", clear
merge 1:m sitc4 using "${hmrc_datadir}\Trade\sitc_comcode.dta"
keep sitc4 comcode con lib _merge
keep if _merge==3
drop _merge
gen com8=comcode
replace com8=int(com8/1000000) if com8>99999999
replace com8=int(com8/10) if com8>99999999
gen diff_con=con=="n"
gen diff_lib=lib=="n"
bysort com8: egen differentiated_con=max(diff_con)
bysort com8: egen differentiated_lib=max(diff_lib)
keep com8 differentiated_con differentiated_lib
gen diff=differentiated_con-differentiated_lib
tab diff
drop diff
duplicates drop
save "${hmrc_datadir}\Trade\differentiated.dta", replace

**distance, shared language and ex-colonial
insheet using "${hmrc_datadir}\CBT\CEPII.csv", clear
drop iso_o
rename iso_d alpha3code
save "${hmrc_datadir}\CBT\CEPII", replace
